package com.liusu.spider.webmagic;

import org.junit.jupiter.api.Test;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

public class PageProcessorObject implements PageProcessor {

    @Override
    public void process(Page page) {
        page.putField("a_list",page.getHtml().css("div.headswiper-content > div > div.headswiper-item a").all());
        page.putField("a",page.getHtml().xpath("//*[@id=\"floor-www-index_558\"]/div/div[2]/div[1]/div[1]/div[1]/dl/dd[1]/a").all());
        page.putField("img",page.getHtml().regex("<img.*?>").all());
    }


    public Site site = Site.me();

    @Override
    public Site getSite() {
        return site;
    }

    @Test
    public void processor(){
        /**
         * [1]创建自己的页面解析器
         * [2]设置请求的URL
         * [3]启动爬虫
         */
        Spider.create(new PageProcessorObject()).addUrl("https://www.csdn.net/").run();

    }

}
